The aim is to characterize the human fetal kidney from the kidney
cell atlas. You can find more about the human kidney atlas here: https://www.kidneycellatlas.org/ [1] The rds data can be
download using the download link https://datasets.cellxgene.cziscience.com/40ebb8e4-1a25-4a33-b8ff-02d1156e4e9b.rds
The azimuth compatible reference has been downloaded and created in the
R script
download-and-create-fetal-kidney-ref.R
Load required packages in the following chunk, if needed. Do not
install packages here; only load them with the library()
function.
library("Seurat")
library(Azimuth)
library(SCpubr)
library(tidyverse)
library(patchwork)
set.seed(params$seed)
options(future.globals.maxSize= 891289600000)# The base path for the OpenScPCA repository, found by its (hidden) .git directory
repository_base <- rprojroot::find_root(rprojroot::is_git_root)
# The path to this module
module_base <- file.path(repository_base, "analyses", "cell-type-wilms-tumor-06")The input file
/Users/sjspielman/ALSF/open-scpca/OpenScPCA-analysis/analyses/cell-type-wilms-tumor-06/scratch/fetal_kidney.rds
is the output of the R script
prepare-fetal-references.R.
We will save the result of the differential expression analysis in
results/references/00b_marker_genes_fetal_kidney_Stewart.csv Notebook is
saved in the notebook/00-reference directory
path_to_output <- file.path(module_base, "results", "references")fetal_kidney <- readRDS(params$fetal_kidney_path)
d1 <- do_DimPlot(fetal_kidney, reduction = "umap", dims = c(1,2), group.by = "compartment", label = TRUE, repel = TRUE) + NoLegend()
d2 <- do_DimPlot(fetal_kidney, reduction = "umap", dims = c(1,2), group.by = "cell_type", label = TRUE, repel = TRUE) + NoLegend()
d1 | d2Here, we use an unbiased approach to find transcripts that characterized the different compartments and cell types.
This is just to get markers genes of the different population, in case some could be of interest for the Wilms tumor annotations.
We run DElegate::FindAllMarkers2 to find markers of the different clusters and manually check if they do make sense. DElegate::FindAllMarkers2 is an improved version of Seurat::FindAllMarkers based on pseudobulk differential expression method. Please check the preprint from Chistoph Hafemeister: https://www.biorxiv.org/content/10.1101/2023.03.28.534443v1 and tool described here: https://github.com/cancerbits/DElegate
de_results <- DElegate::FindAllMarkers2(fetal_kidney, group_column = "compartment",)## Warning in size + sum(size_args, na.rm = FALSE):
## NAs produced by integer overflow
#filter the most relevant markers
s.markers <- de_results[de_results$padj < params$padj_threshold & de_results$log_fc > params$lfc_threshold & de_results$rate1 > params$rate1_threshold,]
DT::datatable(s.markers, caption = ("marker genes"),
extensions = 'Buttons',
options = list( dom = 'Bfrtip',
buttons = c( 'csv', 'excel')))# Select top 5 genes for heatmap plotting
s.markers <- na.omit(s.markers)
s.markers %>%
group_by(group1) %>%
top_n(n = 5, wt = log_fc) -> top5
# subset for plotting
Idents(fetal_kidney) <- fetal_kidney$compartment
cells <- WhichCells(fetal_kidney, downsample = 100)
ss <- subset(fetal_kidney, cells = cells)
ss <- ScaleData(ss, features = top5$feature)
p1 <- SCpubr::do_DimPlot(fetal_kidney, reduction="umap", group.by = "compartment", label = TRUE, repel = TRUE) + ggtitle("compartment")
p2 <- DoHeatmap(ss, features = top5$feature, cells = cells, group.by = "compartment") + NoLegend() +
scale_fill_gradientn(colors = c("#01665e","#35978f",'darkslategray3', "#f7f7f7", "#fee391","#fec44f","#F9AD03"))
p3 <- ggplot(fetal_kidney@meta.data, aes(compartment, fill = compartment)) + geom_bar() + NoLegend()
common_title <- sprintf("Unsupervised clustering %s, %d cells", fetal_kidney@meta.data$orig.ident[1], ncol(fetal_kidney))
show((((p1 / p3) + plot_layout(heights = c(3,2)) | p2) ) + plot_layout(widths = c(1, 2)) + plot_layout(heights = c(3,1)) + plot_annotation(title = common_title))write_csv(de_results, file = file.path(path_to_output, "00a_marker_compartment_fetal_kidney_Stewart.csv"))de_results <- DElegate::FindAllMarkers2(fetal_kidney, group_column = "cell_type")## Warning in size + sum(size_args, na.rm = FALSE):
## NAs produced by integer overflow
#filter the most relevant markers
s.markers <- de_results[de_results$padj < params$padj_threshold & de_results$log_fc > params$lfc_threshold & de_results$rate1 > params$rate1_threshold,]
DT::datatable(s.markers, caption = ("marker genes"),
extensions = 'Buttons',
options = list( dom = 'Bfrtip',
buttons = c( 'csv', 'excel')))# Select top 5 genes for heatmap plotting
s.markers <- na.omit(s.markers)
s.markers %>%
group_by(group1) %>%
top_n(n = 5, wt = log_fc) -> top5
# subset for plotting
Idents(fetal_kidney) <- fetal_kidney$cell_type
cells <- WhichCells(fetal_kidney, downsample = 100)
ss <- subset(fetal_kidney, cells = cells)
ss <- ScaleData(ss, features = top5$feature)
p1 <- SCpubr::do_DimPlot(fetal_kidney, reduction="umap", group.by = "cell_type", label = TRUE, repel = TRUE) + ggtitle("cell_type") + NoLegend()
p2 <- DoHeatmap(ss, features = top5$feature, cells = cells, group.by = "cell_type") + NoLegend() +
scale_fill_gradientn(colors = c("#01665e","#35978f",'darkslategray3', "#f7f7f7", "#fee391","#fec44f","#F9AD03"))
p3 <- ggplot(fetal_kidney@meta.data, aes(cell_type, fill = cell_type)) + geom_bar() + NoLegend() + scale_x_discrete(guide = guide_axis(angle = 90))
common_title <- sprintf("Unsupervised clustering %s, %d cells", fetal_kidney@meta.data$orig.ident[1], ncol(fetal_kidney))
show((((p1 / p3) + plot_layout(heights = c(3,2)) | p2) ) + plot_layout(widths = c(1, 1)) + plot_layout(heights = c(3,1)) + plot_annotation(title = common_title))write_csv( de_results, file = file.path(path_to_output, "00a_marker_cell-type_fetal_kidney_Stewart.csv"))sessionInfo()## R version 4.4.1 (2024-06-14)
## Platform: aarch64-apple-darwin20
## Running under: macOS 15.0.1
##
## Matrix products: default
## BLAS: /System/Library/Frameworks/Accelerate.framework/Versions/A/Frameworks/vecLib.framework/Versions/A/libBLAS.dylib
## LAPACK: /Library/Frameworks/R.framework/Versions/4.4-arm64/Resources/lib/libRlapack.dylib; LAPACK version 3.12.0
##
## locale:
## [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
##
## time zone: America/New_York
## tzcode source: internal
##
## attached base packages:
## [1] stats graphics grDevices datasets
## [5] utils methods base
##
## other attached packages:
## [1] patchwork_1.2.0 lubridate_1.9.3
## [3] forcats_1.0.0 stringr_1.5.1
## [5] dplyr_1.1.4 purrr_1.0.2
## [7] readr_2.1.5 tidyr_1.3.1
## [9] tibble_3.2.1 ggplot2_3.5.1
## [11] tidyverse_2.0.0 SCpubr_2.0.2
## [13] Azimuth_0.5.0 shinyBS_0.61.1
## [15] Seurat_5.1.0 SeuratObject_5.0.2
## [17] sp_2.1-4
##
## loaded via a namespace (and not attached):
## [1] fs_1.6.4
## [2] ProtGenerics_1.36.0
## [3] matrixStats_1.3.0
## [4] spatstat.sparse_3.1-0
## [5] bitops_1.0-8
## [6] DirichletMultinomial_1.46.0
## [7] TFBSTools_1.42.0
## [8] httr_1.4.7
## [9] RColorBrewer_1.1-3
## [10] tools_4.4.1
## [11] sctransform_0.4.1
## [12] utf8_1.2.4
## [13] R6_2.5.1
## [14] DT_0.33
## [15] lazyeval_0.2.2
## [16] uwot_0.2.2
## [17] rhdf5filters_1.16.0
## [18] withr_3.0.1
## [19] gridExtra_2.3
## [20] progressr_0.14.0
## [21] cli_3.6.3
## [22] Biobase_2.64.0
## [23] spatstat.explore_3.3-2
## [24] fastDummies_1.7.4
## [25] EnsDb.Hsapiens.v86_2.99.0
## [26] shinyjs_2.1.0
## [27] sass_0.4.9
## [28] labeling_0.4.3
## [29] spatstat.data_3.1-2
## [30] ggridges_0.5.6
## [31] pbapply_1.7-2
## [32] yulab.utils_0.1.7
## [33] Rsamtools_2.20.0
## [34] R.utils_2.12.3
## [35] parallelly_1.38.0
## [36] limma_3.60.4
## [37] BSgenome_1.72.0
## [38] rstudioapi_0.16.0
## [39] RSQLite_2.3.7
## [40] gridGraphics_0.5-1
## [41] generics_0.1.3
## [42] BiocIO_1.14.0
## [43] vroom_1.6.5
## [44] crosstalk_1.2.1
## [45] gtools_3.9.5
## [46] ica_1.0-3
## [47] spatstat.random_3.3-1
## [48] googlesheets4_1.1.1
## [49] GO.db_3.19.1
## [50] Matrix_1.7-0
## [51] fansi_1.0.6
## [52] S4Vectors_0.42.1
## [53] abind_1.4-5
## [54] R.methodsS3_1.8.2
## [55] lifecycle_1.0.4
## [56] edgeR_4.2.1
## [57] yaml_2.3.10
## [58] SummarizedExperiment_1.34.0
## [59] rhdf5_2.48.0
## [60] SparseArray_1.4.8
## [61] Rtsne_0.17
## [62] grid_4.4.1
## [63] blob_1.2.4
## [64] promises_1.3.0
## [65] shinydashboard_0.7.2
## [66] crayon_1.5.3
## [67] pwalign_1.0.0
## [68] miniUI_0.1.1.1
## [69] lattice_0.22-6
## [70] cowplot_1.1.3
## [71] GenomicFeatures_1.56.0
## [72] annotate_1.82.0
## [73] KEGGREST_1.44.1
## [74] knitr_1.48
## [75] pillar_1.9.0
## [76] GenomicRanges_1.56.1
## [77] rjson_0.2.22
## [78] future.apply_1.11.2
## [79] codetools_0.2-20
## [80] fastmatch_1.1-4
## [81] leiden_0.4.3.1
## [82] glue_1.7.0
## [83] spatstat.univar_3.0-0
## [84] data.table_1.16.0
## [85] vctrs_0.6.5
## [86] png_0.1-8
## [87] spam_2.10-0
## [88] cellranger_1.1.0
## [89] gtable_0.3.5
## [90] poweRlaw_0.80.0
## [91] assertthat_0.2.1
## [92] cachem_1.1.0
## [93] xfun_0.47
## [94] Signac_1.14.0
## [95] S4Arrays_1.4.1
## [96] mime_0.12
## [97] pracma_2.4.4
## [98] survival_3.7-0
## [99] DElegate_1.2.1
## [100] gargle_1.5.2
## [101] RcppRoll_0.3.1
## [102] statmod_1.5.0
## [103] fitdistrplus_1.2-1
## [104] ROCR_1.0-11
## [105] nlme_3.1-166
## [106] bit64_4.0.5
## [107] RcppAnnoy_0.0.22
## [108] rprojroot_2.0.4
## [109] GenomeInfoDb_1.40.1
## [110] bslib_0.8.0
## [111] irlba_2.3.5.1
## [112] KernSmooth_2.23-24
## [113] SeuratDisk_0.0.0.9021
## [114] colorspace_2.1-1
## [115] seqLogo_1.70.0
## [116] BiocGenerics_0.50.0
## [117] DBI_1.2.3
## [118] tidyselect_1.2.1
## [119] bit_4.0.5
## [120] compiler_4.4.1
## [121] curl_5.2.2
## [122] hdf5r_1.3.11
## [123] DelayedArray_0.30.1
## [124] plotly_4.10.4
## [125] rtracklayer_1.64.0
## [126] scales_1.3.0
## [127] caTools_1.18.2
## [128] lmtest_0.9-40
## [129] rappdirs_0.3.3
## [130] digest_0.6.37
## [131] goftest_1.2-3
## [132] presto_1.0.0
## [133] spatstat.utils_3.1-0
## [134] rmarkdown_2.28
## [135] XVector_0.44.0
## [136] htmltools_0.5.8.1
## [137] pkgconfig_2.0.3
## [138] sparseMatrixStats_1.16.0
## [139] MatrixGenerics_1.16.0
## [140] highr_0.11
## [141] fastmap_1.2.0
## [142] ensembldb_2.28.1
## [143] rlang_1.1.4
## [144] htmlwidgets_1.6.4
## [145] UCSC.utils_1.0.0
## [146] shiny_1.9.1
## [147] jquerylib_0.1.4
## [148] farver_2.1.2
## [149] zoo_1.8-12
## [150] jsonlite_1.8.8
## [151] BiocParallel_1.38.0
## [152] R.oo_1.26.0
## [153] RCurl_1.98-1.16
## [154] magrittr_2.0.3
## [155] ggplotify_0.1.2
## [156] GenomeInfoDbData_1.2.12
## [157] dotCall64_1.1-1
## [158] Rhdf5lib_1.26.0
## [159] munsell_0.5.1
## [160] Rcpp_1.0.13
## [161] viridis_0.6.5
## [162] reticulate_1.38.0
## [163] stringi_1.8.4
## [164] zlibbioc_1.50.0
## [165] MASS_7.3-61
## [166] plyr_1.8.9
## [167] parallel_4.4.1
## [168] listenv_0.9.1
## [169] ggrepel_0.9.5
## [170] deldir_2.0-4
## [171] CNEr_1.40.0
## [172] Biostrings_2.72.1
## [173] splines_4.4.1
## [174] tensor_1.5
## [175] hms_1.1.3
## [176] locfit_1.5-9.10
## [177] BSgenome.Hsapiens.UCSC.hg38_1.4.5
## [178] igraph_2.0.3
## [179] spatstat.geom_3.3-2
## [180] RcppHNSW_0.6.0
## [181] reshape2_1.4.4
## [182] stats4_4.4.1
## [183] TFMPvalue_0.0.9
## [184] XML_3.99-0.17
## [185] evaluate_0.24.0
## [186] renv_1.0.7
## [187] BiocManager_1.30.25
## [188] JASPAR2020_0.99.10
## [189] tzdb_0.4.0
## [190] httpuv_1.6.15
## [191] RANN_2.6.2
## [192] polyclip_1.10-7
## [193] future_1.34.0
## [194] SeuratData_0.2.2.9001
## [195] scattermore_1.2
## [196] xtable_1.8-4
## [197] restfulr_0.0.15
## [198] AnnotationFilter_1.28.0
## [199] RSpectra_0.16-2
## [200] later_1.3.2
## [201] viridisLite_0.4.2
## [202] googledrive_2.1.1
## [203] memoise_2.0.1
## [204] AnnotationDbi_1.66.0
## [205] GenomicAlignments_1.40.0
## [206] IRanges_2.38.1
## [207] cluster_2.1.6
## [208] timechange_0.3.0
## [209] globals_0.16.3